; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx -show-mc-encoding -mcpu=knl < %s | FileCheck %s -check-prefix=CHECK64-KNL
; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx -show-mc-encoding -mcpu=skx < %s | FileCheck %s -check-prefix=CHECK64-SKX
; RUN: llc -verify-machineinstrs -mtriple=i386-apple-macosx -show-mc-encoding -mcpu=knl < %s | FileCheck %s -check-prefix=CHECK32-KNL
; RUN: llc -verify-machineinstrs -mtriple=i386-apple-macosx -show-mc-encoding -mcpu=skx < %s | FileCheck %s -check-prefix=CHECK32-SKX

; Make sure we spill the high numbered zmm registers and K registers with the right encoding.

define x86_intrcc void @foo(i8* byval(i8) %frame) {
; CHECK64-KNL-LABEL: foo:
; CHECK64-KNL:       ## %bb.0:
; CHECK64-KNL-NEXT:    pushq %rax ## encoding: [0x50]
; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 16
; CHECK64-KNL-NEXT:    pushq %r11 ## encoding: [0x41,0x53]
; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 24
; CHECK64-KNL-NEXT:    pushq %r10 ## encoding: [0x41,0x52]
; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 32
; CHECK64-KNL-NEXT:    pushq %r9 ## encoding: [0x41,0x51]
; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 40
; CHECK64-KNL-NEXT:    pushq %r8 ## encoding: [0x41,0x50]
; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 48
; CHECK64-KNL-NEXT:    pushq %rdi ## encoding: [0x57]
; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 56
; CHECK64-KNL-NEXT:    pushq %rsi ## encoding: [0x56]
; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 64
; CHECK64-KNL-NEXT:    pushq %rdx ## encoding: [0x52]
; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 72
; CHECK64-KNL-NEXT:    pushq %rcx ## encoding: [0x51]
; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 80
; CHECK64-KNL-NEXT:    subq $2096, %rsp ## encoding: [0x48,0x81,0xec,0x30,0x08,0x00,0x00]
; CHECK64-KNL-NEXT:    ## imm = 0x830
; CHECK64-KNL-NEXT:    kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xbc,0x24,0x2e,0x08,0x00,0x00]
; CHECK64-KNL-NEXT:    kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xb4,0x24,0x2c,0x08,0x00,0x00]
; CHECK64-KNL-NEXT:    kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xac,0x24,0x2a,0x08,0x00,0x00]
; CHECK64-KNL-NEXT:    kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xa4,0x24,0x28,0x08,0x00,0x00]
; CHECK64-KNL-NEXT:    kmovw %k3, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x9c,0x24,0x26,0x08,0x00,0x00]
; CHECK64-KNL-NEXT:    kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x94,0x24,0x24,0x08,0x00,0x00]
; CHECK64-KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x8c,0x24,0x22,0x08,0x00,0x00]
; CHECK64-KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x84,0x24,0x20,0x08,0x00,0x00]
; CHECK64-KNL-NEXT:    vmovups %zmm31, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x7c,0x24,0x1f]
; CHECK64-KNL-NEXT:    vmovups %zmm30, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x74,0x24,0x1e]
; CHECK64-KNL-NEXT:    vmovups %zmm29, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x6c,0x24,0x1d]
; CHECK64-KNL-NEXT:    vmovups %zmm28, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x64,0x24,0x1c]
; CHECK64-KNL-NEXT:    vmovups %zmm27, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x5c,0x24,0x1b]
; CHECK64-KNL-NEXT:    vmovups %zmm26, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x54,0x24,0x1a]
; CHECK64-KNL-NEXT:    vmovups %zmm25, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x4c,0x24,0x19]
; CHECK64-KNL-NEXT:    vmovups %zmm24, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x44,0x24,0x18]
; CHECK64-KNL-NEXT:    vmovups %zmm23, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x7c,0x24,0x17]
; CHECK64-KNL-NEXT:    vmovups %zmm22, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x74,0x24,0x16]
; CHECK64-KNL-NEXT:    vmovups %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x6c,0x24,0x15]
; CHECK64-KNL-NEXT:    vmovups %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x64,0x24,0x14]
; CHECK64-KNL-NEXT:    vmovups %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x5c,0x24,0x13]
; CHECK64-KNL-NEXT:    vmovups %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x54,0x24,0x12]
; CHECK64-KNL-NEXT:    vmovups %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x4c,0x24,0x11]
; CHECK64-KNL-NEXT:    vmovups %zmm16, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x44,0x24,0x10]
; CHECK64-KNL-NEXT:    vmovups %zmm15, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x7c,0x24,0x0f]
; CHECK64-KNL-NEXT:    vmovups %zmm14, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x74,0x24,0x0e]
; CHECK64-KNL-NEXT:    vmovups %zmm13, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x6c,0x24,0x0d]
; CHECK64-KNL-NEXT:    vmovups %zmm12, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x64,0x24,0x0c]
; CHECK64-KNL-NEXT:    vmovups %zmm11, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x5c,0x24,0x0b]
; CHECK64-KNL-NEXT:    vmovups %zmm10, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x54,0x24,0x0a]
; CHECK64-KNL-NEXT:    vmovups %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x4c,0x24,0x09]
; CHECK64-KNL-NEXT:    vmovups %zmm8, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x44,0x24,0x08]
; CHECK64-KNL-NEXT:    vmovups %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07]
; CHECK64-KNL-NEXT:    vmovups %zmm6, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06]
; CHECK64-KNL-NEXT:    vmovups %zmm5, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05]
; CHECK64-KNL-NEXT:    vmovups %zmm4, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04]
; CHECK64-KNL-NEXT:    vmovups %zmm3, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03]
; CHECK64-KNL-NEXT:    vmovups %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02]
; CHECK64-KNL-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01]
; CHECK64-KNL-NEXT:    vmovups %zmm0, (%rsp) ## 64-byte Spill
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24]
; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 2176
; CHECK64-KNL-NEXT:    .cfi_offset %rcx, -80
; CHECK64-KNL-NEXT:    .cfi_offset %rdx, -72
; CHECK64-KNL-NEXT:    .cfi_offset %rsi, -64
; CHECK64-KNL-NEXT:    .cfi_offset %rdi, -56
; CHECK64-KNL-NEXT:    .cfi_offset %r8, -48
; CHECK64-KNL-NEXT:    .cfi_offset %r9, -40
; CHECK64-KNL-NEXT:    .cfi_offset %r10, -32
; CHECK64-KNL-NEXT:    .cfi_offset %r11, -24
; CHECK64-KNL-NEXT:    .cfi_offset %rax, -16
; CHECK64-KNL-NEXT:    .cfi_offset %xmm0, -2176
; CHECK64-KNL-NEXT:    .cfi_offset %xmm1, -2112
; CHECK64-KNL-NEXT:    .cfi_offset %xmm2, -2048
; CHECK64-KNL-NEXT:    .cfi_offset %xmm3, -1984
; CHECK64-KNL-NEXT:    .cfi_offset %xmm4, -1920
; CHECK64-KNL-NEXT:    .cfi_offset %xmm5, -1856
; CHECK64-KNL-NEXT:    .cfi_offset %xmm6, -1792
; CHECK64-KNL-NEXT:    .cfi_offset %xmm7, -1728
; CHECK64-KNL-NEXT:    .cfi_offset %xmm8, -1664
; CHECK64-KNL-NEXT:    .cfi_offset %xmm9, -1600
; CHECK64-KNL-NEXT:    .cfi_offset %xmm10, -1536
; CHECK64-KNL-NEXT:    .cfi_offset %xmm11, -1472
; CHECK64-KNL-NEXT:    .cfi_offset %xmm12, -1408
; CHECK64-KNL-NEXT:    .cfi_offset %xmm13, -1344
; CHECK64-KNL-NEXT:    .cfi_offset %xmm14, -1280
; CHECK64-KNL-NEXT:    .cfi_offset %xmm15, -1216
; CHECK64-KNL-NEXT:    .cfi_offset %xmm16, -1152
; CHECK64-KNL-NEXT:    .cfi_offset %xmm17, -1088
; CHECK64-KNL-NEXT:    .cfi_offset %xmm18, -1024
; CHECK64-KNL-NEXT:    .cfi_offset %xmm19, -960
; CHECK64-KNL-NEXT:    .cfi_offset %xmm20, -896
; CHECK64-KNL-NEXT:    .cfi_offset %xmm21, -832
; CHECK64-KNL-NEXT:    .cfi_offset %xmm22, -768
; CHECK64-KNL-NEXT:    .cfi_offset %xmm23, -704
; CHECK64-KNL-NEXT:    .cfi_offset %xmm24, -640
; CHECK64-KNL-NEXT:    .cfi_offset %xmm25, -576
; CHECK64-KNL-NEXT:    .cfi_offset %xmm26, -512
; CHECK64-KNL-NEXT:    .cfi_offset %xmm27, -448
; CHECK64-KNL-NEXT:    .cfi_offset %xmm28, -384
; CHECK64-KNL-NEXT:    .cfi_offset %xmm29, -320
; CHECK64-KNL-NEXT:    .cfi_offset %xmm30, -256
; CHECK64-KNL-NEXT:    .cfi_offset %xmm31, -192
; CHECK64-KNL-NEXT:    .cfi_offset %k0, -96
; CHECK64-KNL-NEXT:    .cfi_offset %k1, -94
; CHECK64-KNL-NEXT:    .cfi_offset %k2, -92
; CHECK64-KNL-NEXT:    .cfi_offset %k3, -90
; CHECK64-KNL-NEXT:    .cfi_offset %k4, -88
; CHECK64-KNL-NEXT:    .cfi_offset %k5, -86
; CHECK64-KNL-NEXT:    .cfi_offset %k6, -84
; CHECK64-KNL-NEXT:    .cfi_offset %k7, -82
; CHECK64-KNL-NEXT:    cld ## encoding: [0xfc]
; CHECK64-KNL-NEXT:    callq _bar ## encoding: [0xe8,A,A,A,A]
; CHECK64-KNL-NEXT:    ## fixup A - offset: 1, value: _bar-4, kind: reloc_branch_4byte_pcrel
; CHECK64-KNL-NEXT:    vmovups (%rsp), %zmm0 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm3 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm4 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm5 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm6 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm8 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x44,0x24,0x08]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm9 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x4c,0x24,0x09]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm10 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x54,0x24,0x0a]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm11 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x5c,0x24,0x0b]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm12 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x64,0x24,0x0c]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm13 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x6c,0x24,0x0d]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm14 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x74,0x24,0x0e]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm15 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x7c,0x24,0x0f]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm16 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x44,0x24,0x10]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x4c,0x24,0x11]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x54,0x24,0x12]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x5c,0x24,0x13]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x64,0x24,0x14]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x6c,0x24,0x15]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm22 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x74,0x24,0x16]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm23 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x7c,0x24,0x17]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm24 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x44,0x24,0x18]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm25 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x4c,0x24,0x19]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm26 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x54,0x24,0x1a]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm27 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x5c,0x24,0x1b]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm28 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x64,0x24,0x1c]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm29 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x6c,0x24,0x1d]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm30 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x74,0x24,0x1e]
; CHECK64-KNL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm31 ## 64-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x7c,0x24,0x1f]
; CHECK64-KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x84,0x24,0x20,0x08,0x00,0x00]
; CHECK64-KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x8c,0x24,0x22,0x08,0x00,0x00]
; CHECK64-KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x94,0x24,0x24,0x08,0x00,0x00]
; CHECK64-KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x9c,0x24,0x26,0x08,0x00,0x00]
; CHECK64-KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xa4,0x24,0x28,0x08,0x00,0x00]
; CHECK64-KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xac,0x24,0x2a,0x08,0x00,0x00]
; CHECK64-KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xb4,0x24,0x2c,0x08,0x00,0x00]
; CHECK64-KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; CHECK64-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xbc,0x24,0x2e,0x08,0x00,0x00]
; CHECK64-KNL-NEXT:    addq $2096, %rsp ## encoding: [0x48,0x81,0xc4,0x30,0x08,0x00,0x00]
; CHECK64-KNL-NEXT:    ## imm = 0x830
; CHECK64-KNL-NEXT:    popq %rcx ## encoding: [0x59]
; CHECK64-KNL-NEXT:    popq %rdx ## encoding: [0x5a]
; CHECK64-KNL-NEXT:    popq %rsi ## encoding: [0x5e]
; CHECK64-KNL-NEXT:    popq %rdi ## encoding: [0x5f]
; CHECK64-KNL-NEXT:    popq %r8 ## encoding: [0x41,0x58]
; CHECK64-KNL-NEXT:    popq %r9 ## encoding: [0x41,0x59]
; CHECK64-KNL-NEXT:    popq %r10 ## encoding: [0x41,0x5a]
; CHECK64-KNL-NEXT:    popq %r11 ## encoding: [0x41,0x5b]
; CHECK64-KNL-NEXT:    popq %rax ## encoding: [0x58]
; CHECK64-KNL-NEXT:    iretq ## encoding: [0x48,0xcf]
;
; CHECK64-SKX-LABEL: foo:
; CHECK64-SKX:       ## %bb.0:
; CHECK64-SKX-NEXT:    pushq %rax ## encoding: [0x50]
; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 16
; CHECK64-SKX-NEXT:    pushq %r11 ## encoding: [0x41,0x53]
; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 24
; CHECK64-SKX-NEXT:    pushq %r10 ## encoding: [0x41,0x52]
; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 32
; CHECK64-SKX-NEXT:    pushq %r9 ## encoding: [0x41,0x51]
; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 40
; CHECK64-SKX-NEXT:    pushq %r8 ## encoding: [0x41,0x50]
; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 48
; CHECK64-SKX-NEXT:    pushq %rdi ## encoding: [0x57]
; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 56
; CHECK64-SKX-NEXT:    pushq %rsi ## encoding: [0x56]
; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 64
; CHECK64-SKX-NEXT:    pushq %rdx ## encoding: [0x52]
; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 72
; CHECK64-SKX-NEXT:    pushq %rcx ## encoding: [0x51]
; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 80
; CHECK64-SKX-NEXT:    subq $2160, %rsp ## encoding: [0x48,0x81,0xec,0x70,0x08,0x00,0x00]
; CHECK64-SKX-NEXT:    ## imm = 0x870
; CHECK64-SKX-NEXT:    kmovq %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x08,0x00,0x00]
; CHECK64-SKX-NEXT:    kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xb4,0x24,0x60,0x08,0x00,0x00]
; CHECK64-SKX-NEXT:    kmovq %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xac,0x24,0x58,0x08,0x00,0x00]
; CHECK64-SKX-NEXT:    kmovq %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xa4,0x24,0x50,0x08,0x00,0x00]
; CHECK64-SKX-NEXT:    kmovq %k3, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x9c,0x24,0x48,0x08,0x00,0x00]
; CHECK64-SKX-NEXT:    kmovq %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x94,0x24,0x40,0x08,0x00,0x00]
; CHECK64-SKX-NEXT:    kmovq %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x8c,0x24,0x38,0x08,0x00,0x00]
; CHECK64-SKX-NEXT:    kmovq %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x08,0x00,0x00]
; CHECK64-SKX-NEXT:    vmovups %zmm31, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x7c,0x24,0x1f]
; CHECK64-SKX-NEXT:    vmovups %zmm30, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x74,0x24,0x1e]
; CHECK64-SKX-NEXT:    vmovups %zmm29, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x6c,0x24,0x1d]
; CHECK64-SKX-NEXT:    vmovups %zmm28, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x64,0x24,0x1c]
; CHECK64-SKX-NEXT:    vmovups %zmm27, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x5c,0x24,0x1b]
; CHECK64-SKX-NEXT:    vmovups %zmm26, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x54,0x24,0x1a]
; CHECK64-SKX-NEXT:    vmovups %zmm25, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x4c,0x24,0x19]
; CHECK64-SKX-NEXT:    vmovups %zmm24, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x44,0x24,0x18]
; CHECK64-SKX-NEXT:    vmovups %zmm23, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x7c,0x24,0x17]
; CHECK64-SKX-NEXT:    vmovups %zmm22, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x74,0x24,0x16]
; CHECK64-SKX-NEXT:    vmovups %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x6c,0x24,0x15]
; CHECK64-SKX-NEXT:    vmovups %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x64,0x24,0x14]
; CHECK64-SKX-NEXT:    vmovups %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x5c,0x24,0x13]
; CHECK64-SKX-NEXT:    vmovups %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x54,0x24,0x12]
; CHECK64-SKX-NEXT:    vmovups %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x4c,0x24,0x11]
; CHECK64-SKX-NEXT:    vmovups %zmm16, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x44,0x24,0x10]
; CHECK64-SKX-NEXT:    vmovups %zmm15, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x7c,0x24,0x0f]
; CHECK64-SKX-NEXT:    vmovups %zmm14, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x74,0x24,0x0e]
; CHECK64-SKX-NEXT:    vmovups %zmm13, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x6c,0x24,0x0d]
; CHECK64-SKX-NEXT:    vmovups %zmm12, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x64,0x24,0x0c]
; CHECK64-SKX-NEXT:    vmovups %zmm11, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x5c,0x24,0x0b]
; CHECK64-SKX-NEXT:    vmovups %zmm10, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x54,0x24,0x0a]
; CHECK64-SKX-NEXT:    vmovups %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x4c,0x24,0x09]
; CHECK64-SKX-NEXT:    vmovups %zmm8, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x44,0x24,0x08]
; CHECK64-SKX-NEXT:    vmovups %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07]
; CHECK64-SKX-NEXT:    vmovups %zmm6, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06]
; CHECK64-SKX-NEXT:    vmovups %zmm5, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05]
; CHECK64-SKX-NEXT:    vmovups %zmm4, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04]
; CHECK64-SKX-NEXT:    vmovups %zmm3, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03]
; CHECK64-SKX-NEXT:    vmovups %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02]
; CHECK64-SKX-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01]
; CHECK64-SKX-NEXT:    vmovups %zmm0, (%rsp) ## 64-byte Spill
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24]
; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 2240
; CHECK64-SKX-NEXT:    .cfi_offset %rcx, -80
; CHECK64-SKX-NEXT:    .cfi_offset %rdx, -72
; CHECK64-SKX-NEXT:    .cfi_offset %rsi, -64
; CHECK64-SKX-NEXT:    .cfi_offset %rdi, -56
; CHECK64-SKX-NEXT:    .cfi_offset %r8, -48
; CHECK64-SKX-NEXT:    .cfi_offset %r9, -40
; CHECK64-SKX-NEXT:    .cfi_offset %r10, -32
; CHECK64-SKX-NEXT:    .cfi_offset %r11, -24
; CHECK64-SKX-NEXT:    .cfi_offset %rax, -16
; CHECK64-SKX-NEXT:    .cfi_offset %xmm0, -2240
; CHECK64-SKX-NEXT:    .cfi_offset %xmm1, -2176
; CHECK64-SKX-NEXT:    .cfi_offset %xmm2, -2112
; CHECK64-SKX-NEXT:    .cfi_offset %xmm3, -2048
; CHECK64-SKX-NEXT:    .cfi_offset %xmm4, -1984
; CHECK64-SKX-NEXT:    .cfi_offset %xmm5, -1920
; CHECK64-SKX-NEXT:    .cfi_offset %xmm6, -1856
; CHECK64-SKX-NEXT:    .cfi_offset %xmm7, -1792
; CHECK64-SKX-NEXT:    .cfi_offset %xmm8, -1728
; CHECK64-SKX-NEXT:    .cfi_offset %xmm9, -1664
; CHECK64-SKX-NEXT:    .cfi_offset %xmm10, -1600
; CHECK64-SKX-NEXT:    .cfi_offset %xmm11, -1536
; CHECK64-SKX-NEXT:    .cfi_offset %xmm12, -1472
; CHECK64-SKX-NEXT:    .cfi_offset %xmm13, -1408
; CHECK64-SKX-NEXT:    .cfi_offset %xmm14, -1344
; CHECK64-SKX-NEXT:    .cfi_offset %xmm15, -1280
; CHECK64-SKX-NEXT:    .cfi_offset %xmm16, -1216
; CHECK64-SKX-NEXT:    .cfi_offset %xmm17, -1152
; CHECK64-SKX-NEXT:    .cfi_offset %xmm18, -1088
; CHECK64-SKX-NEXT:    .cfi_offset %xmm19, -1024
; CHECK64-SKX-NEXT:    .cfi_offset %xmm20, -960
; CHECK64-SKX-NEXT:    .cfi_offset %xmm21, -896
; CHECK64-SKX-NEXT:    .cfi_offset %xmm22, -832
; CHECK64-SKX-NEXT:    .cfi_offset %xmm23, -768
; CHECK64-SKX-NEXT:    .cfi_offset %xmm24, -704
; CHECK64-SKX-NEXT:    .cfi_offset %xmm25, -640
; CHECK64-SKX-NEXT:    .cfi_offset %xmm26, -576
; CHECK64-SKX-NEXT:    .cfi_offset %xmm27, -512
; CHECK64-SKX-NEXT:    .cfi_offset %xmm28, -448
; CHECK64-SKX-NEXT:    .cfi_offset %xmm29, -384
; CHECK64-SKX-NEXT:    .cfi_offset %xmm30, -320
; CHECK64-SKX-NEXT:    .cfi_offset %xmm31, -256
; CHECK64-SKX-NEXT:    .cfi_offset %k0, -144
; CHECK64-SKX-NEXT:    .cfi_offset %k1, -136
; CHECK64-SKX-NEXT:    .cfi_offset %k2, -128
; CHECK64-SKX-NEXT:    .cfi_offset %k3, -120
; CHECK64-SKX-NEXT:    .cfi_offset %k4, -112
; CHECK64-SKX-NEXT:    .cfi_offset %k5, -104
; CHECK64-SKX-NEXT:    .cfi_offset %k6, -96
; CHECK64-SKX-NEXT:    .cfi_offset %k7, -88
; CHECK64-SKX-NEXT:    cld ## encoding: [0xfc]
; CHECK64-SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK64-SKX-NEXT:    callq _bar ## encoding: [0xe8,A,A,A,A]
; CHECK64-SKX-NEXT:    ## fixup A - offset: 1, value: _bar-4, kind: reloc_branch_4byte_pcrel
; CHECK64-SKX-NEXT:    vmovups (%rsp), %zmm0 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm3 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm4 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm5 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm6 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm8 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x44,0x24,0x08]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm9 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x4c,0x24,0x09]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm10 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x54,0x24,0x0a]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm11 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x5c,0x24,0x0b]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm12 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x64,0x24,0x0c]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm13 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x6c,0x24,0x0d]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm14 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x74,0x24,0x0e]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm15 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x7c,0x24,0x0f]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm16 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x44,0x24,0x10]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x4c,0x24,0x11]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x54,0x24,0x12]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x5c,0x24,0x13]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x64,0x24,0x14]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x6c,0x24,0x15]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm22 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x74,0x24,0x16]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm23 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x7c,0x24,0x17]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm24 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x44,0x24,0x18]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm25 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x4c,0x24,0x19]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm26 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x54,0x24,0x1a]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm27 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x5c,0x24,0x1b]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm28 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x64,0x24,0x1c]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm29 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x6c,0x24,0x1d]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm30 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x74,0x24,0x1e]
; CHECK64-SKX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm31 ## 64-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x7c,0x24,0x1f]
; CHECK64-SKX-NEXT:    kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 8-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x08,0x00,0x00]
; CHECK64-SKX-NEXT:    kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 8-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x8c,0x24,0x38,0x08,0x00,0x00]
; CHECK64-SKX-NEXT:    kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 8-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x94,0x24,0x40,0x08,0x00,0x00]
; CHECK64-SKX-NEXT:    kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 8-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x9c,0x24,0x48,0x08,0x00,0x00]
; CHECK64-SKX-NEXT:    kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 8-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xa4,0x24,0x50,0x08,0x00,0x00]
; CHECK64-SKX-NEXT:    kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 8-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xac,0x24,0x58,0x08,0x00,0x00]
; CHECK64-SKX-NEXT:    kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 8-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xb4,0x24,0x60,0x08,0x00,0x00]
; CHECK64-SKX-NEXT:    kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload
; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xbc,0x24,0x68,0x08,0x00,0x00]
; CHECK64-SKX-NEXT:    addq $2160, %rsp ## encoding: [0x48,0x81,0xc4,0x70,0x08,0x00,0x00]
; CHECK64-SKX-NEXT:    ## imm = 0x870
; CHECK64-SKX-NEXT:    popq %rcx ## encoding: [0x59]
; CHECK64-SKX-NEXT:    popq %rdx ## encoding: [0x5a]
; CHECK64-SKX-NEXT:    popq %rsi ## encoding: [0x5e]
; CHECK64-SKX-NEXT:    popq %rdi ## encoding: [0x5f]
; CHECK64-SKX-NEXT:    popq %r8 ## encoding: [0x41,0x58]
; CHECK64-SKX-NEXT:    popq %r9 ## encoding: [0x41,0x59]
; CHECK64-SKX-NEXT:    popq %r10 ## encoding: [0x41,0x5a]
; CHECK64-SKX-NEXT:    popq %r11 ## encoding: [0x41,0x5b]
; CHECK64-SKX-NEXT:    popq %rax ## encoding: [0x58]
; CHECK64-SKX-NEXT:    iretq ## encoding: [0x48,0xcf]
;
; CHECK32-KNL-LABEL: foo:
; CHECK32-KNL:       ## %bb.0:
; CHECK32-KNL-NEXT:    pushl %edx ## encoding: [0x52]
; CHECK32-KNL-NEXT:    .cfi_def_cfa_offset 8
; CHECK32-KNL-NEXT:    pushl %ecx ## encoding: [0x51]
; CHECK32-KNL-NEXT:    .cfi_def_cfa_offset 12
; CHECK32-KNL-NEXT:    pushl %eax ## encoding: [0x50]
; CHECK32-KNL-NEXT:    .cfi_def_cfa_offset 16
; CHECK32-KNL-NEXT:    subl $560, %esp ## encoding: [0x81,0xec,0x30,0x02,0x00,0x00]
; CHECK32-KNL-NEXT:    ## imm = 0x230
; CHECK32-KNL-NEXT:    kmovw %k7, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xbc,0x24,0x2e,0x02,0x00,0x00]
; CHECK32-KNL-NEXT:    kmovw %k6, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xb4,0x24,0x2c,0x02,0x00,0x00]
; CHECK32-KNL-NEXT:    kmovw %k5, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xac,0x24,0x2a,0x02,0x00,0x00]
; CHECK32-KNL-NEXT:    kmovw %k4, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0xa4,0x24,0x28,0x02,0x00,0x00]
; CHECK32-KNL-NEXT:    kmovw %k3, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x9c,0x24,0x26,0x02,0x00,0x00]
; CHECK32-KNL-NEXT:    kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x94,0x24,0x24,0x02,0x00,0x00]
; CHECK32-KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x8c,0x24,0x22,0x02,0x00,0x00]
; CHECK32-KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x91,0x84,0x24,0x20,0x02,0x00,0x00]
; CHECK32-KNL-NEXT:    vmovups %zmm7, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07]
; CHECK32-KNL-NEXT:    vmovups %zmm6, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06]
; CHECK32-KNL-NEXT:    vmovups %zmm5, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05]
; CHECK32-KNL-NEXT:    vmovups %zmm4, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04]
; CHECK32-KNL-NEXT:    vmovups %zmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03]
; CHECK32-KNL-NEXT:    vmovups %zmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02]
; CHECK32-KNL-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01]
; CHECK32-KNL-NEXT:    vmovups %zmm0, (%esp) ## 64-byte Spill
; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24]
; CHECK32-KNL-NEXT:    .cfi_def_cfa_offset 576
; CHECK32-KNL-NEXT:    .cfi_offset %eax, -16
; CHECK32-KNL-NEXT:    .cfi_offset %ecx, -12
; CHECK32-KNL-NEXT:    .cfi_offset %edx, -8
; CHECK32-KNL-NEXT:    .cfi_offset %xmm0, -576
; CHECK32-KNL-NEXT:    .cfi_offset %xmm1, -512
; CHECK32-KNL-NEXT:    .cfi_offset %xmm2, -448
; CHECK32-KNL-NEXT:    .cfi_offset %xmm3, -384
; CHECK32-KNL-NEXT:    .cfi_offset %xmm4, -320
; CHECK32-KNL-NEXT:    .cfi_offset %xmm5, -256
; CHECK32-KNL-NEXT:    .cfi_offset %xmm6, -192
; CHECK32-KNL-NEXT:    .cfi_offset %xmm7, -128
; CHECK32-KNL-NEXT:    .cfi_offset %k0, -32
; CHECK32-KNL-NEXT:    .cfi_offset %k1, -30
; CHECK32-KNL-NEXT:    .cfi_offset %k2, -28
; CHECK32-KNL-NEXT:    .cfi_offset %k3, -26
; CHECK32-KNL-NEXT:    .cfi_offset %k4, -24
; CHECK32-KNL-NEXT:    .cfi_offset %k5, -22
; CHECK32-KNL-NEXT:    .cfi_offset %k6, -20
; CHECK32-KNL-NEXT:    .cfi_offset %k7, -18
; CHECK32-KNL-NEXT:    cld ## encoding: [0xfc]
; CHECK32-KNL-NEXT:    calll _bar ## encoding: [0xe8,A,A,A,A]
; CHECK32-KNL-NEXT:    ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4
; CHECK32-KNL-NEXT:    vmovups (%esp), %zmm0 ## 64-byte Reload
; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24]
; CHECK32-KNL-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm1 ## 64-byte Reload
; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01]
; CHECK32-KNL-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm2 ## 64-byte Reload
; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02]
; CHECK32-KNL-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm3 ## 64-byte Reload
; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03]
; CHECK32-KNL-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm4 ## 64-byte Reload
; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04]
; CHECK32-KNL-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm5 ## 64-byte Reload
; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05]
; CHECK32-KNL-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm6 ## 64-byte Reload
; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06]
; CHECK32-KNL-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm7 ## 64-byte Reload
; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07]
; CHECK32-KNL-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 ## 2-byte Reload
; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x84,0x24,0x20,0x02,0x00,0x00]
; CHECK32-KNL-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload
; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x8c,0x24,0x22,0x02,0x00,0x00]
; CHECK32-KNL-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 2-byte Reload
; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x94,0x24,0x24,0x02,0x00,0x00]
; CHECK32-KNL-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k3 ## 2-byte Reload
; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0x9c,0x24,0x26,0x02,0x00,0x00]
; CHECK32-KNL-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k4 ## 2-byte Reload
; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xa4,0x24,0x28,0x02,0x00,0x00]
; CHECK32-KNL-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload
; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xac,0x24,0x2a,0x02,0x00,0x00]
; CHECK32-KNL-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xb4,0x24,0x2c,0x02,0x00,0x00]
; CHECK32-KNL-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload
; CHECK32-KNL-NEXT:    ## encoding: [0xc5,0xf8,0x90,0xbc,0x24,0x2e,0x02,0x00,0x00]
; CHECK32-KNL-NEXT:    addl $560, %esp ## encoding: [0x81,0xc4,0x30,0x02,0x00,0x00]
; CHECK32-KNL-NEXT:    ## imm = 0x230
; CHECK32-KNL-NEXT:    popl %eax ## encoding: [0x58]
; CHECK32-KNL-NEXT:    popl %ecx ## encoding: [0x59]
; CHECK32-KNL-NEXT:    popl %edx ## encoding: [0x5a]
; CHECK32-KNL-NEXT:    iretl ## encoding: [0xcf]
;
; CHECK32-SKX-LABEL: foo:
; CHECK32-SKX:       ## %bb.0:
; CHECK32-SKX-NEXT:    pushl %edx ## encoding: [0x52]
; CHECK32-SKX-NEXT:    .cfi_def_cfa_offset 8
; CHECK32-SKX-NEXT:    pushl %ecx ## encoding: [0x51]
; CHECK32-SKX-NEXT:    .cfi_def_cfa_offset 12
; CHECK32-SKX-NEXT:    pushl %eax ## encoding: [0x50]
; CHECK32-SKX-NEXT:    .cfi_def_cfa_offset 16
; CHECK32-SKX-NEXT:    subl $624, %esp ## encoding: [0x81,0xec,0x70,0x02,0x00,0x00]
; CHECK32-SKX-NEXT:    ## imm = 0x270
; CHECK32-SKX-NEXT:    kmovq %k7, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x02,0x00,0x00]
; CHECK32-SKX-NEXT:    kmovq %k6, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xb4,0x24,0x60,0x02,0x00,0x00]
; CHECK32-SKX-NEXT:    kmovq %k5, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xac,0x24,0x58,0x02,0x00,0x00]
; CHECK32-SKX-NEXT:    kmovq %k4, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xa4,0x24,0x50,0x02,0x00,0x00]
; CHECK32-SKX-NEXT:    kmovq %k3, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x9c,0x24,0x48,0x02,0x00,0x00]
; CHECK32-SKX-NEXT:    kmovq %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x94,0x24,0x40,0x02,0x00,0x00]
; CHECK32-SKX-NEXT:    kmovq %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x8c,0x24,0x38,0x02,0x00,0x00]
; CHECK32-SKX-NEXT:    kmovq %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill
; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x02,0x00,0x00]
; CHECK32-SKX-NEXT:    vmovups %zmm7, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07]
; CHECK32-SKX-NEXT:    vmovups %zmm6, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06]
; CHECK32-SKX-NEXT:    vmovups %zmm5, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05]
; CHECK32-SKX-NEXT:    vmovups %zmm4, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04]
; CHECK32-SKX-NEXT:    vmovups %zmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03]
; CHECK32-SKX-NEXT:    vmovups %zmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02]
; CHECK32-SKX-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01]
; CHECK32-SKX-NEXT:    vmovups %zmm0, (%esp) ## 64-byte Spill
; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24]
; CHECK32-SKX-NEXT:    .cfi_def_cfa_offset 640
; CHECK32-SKX-NEXT:    .cfi_offset %eax, -16
; CHECK32-SKX-NEXT:    .cfi_offset %ecx, -12
; CHECK32-SKX-NEXT:    .cfi_offset %edx, -8
; CHECK32-SKX-NEXT:    .cfi_offset %xmm0, -640
; CHECK32-SKX-NEXT:    .cfi_offset %xmm1, -576
; CHECK32-SKX-NEXT:    .cfi_offset %xmm2, -512
; CHECK32-SKX-NEXT:    .cfi_offset %xmm3, -448
; CHECK32-SKX-NEXT:    .cfi_offset %xmm4, -384
; CHECK32-SKX-NEXT:    .cfi_offset %xmm5, -320
; CHECK32-SKX-NEXT:    .cfi_offset %xmm6, -256
; CHECK32-SKX-NEXT:    .cfi_offset %xmm7, -192
; CHECK32-SKX-NEXT:    .cfi_offset %k0, -80
; CHECK32-SKX-NEXT:    .cfi_offset %k1, -72
; CHECK32-SKX-NEXT:    .cfi_offset %k2, -64
; CHECK32-SKX-NEXT:    .cfi_offset %k3, -56
; CHECK32-SKX-NEXT:    .cfi_offset %k4, -48
; CHECK32-SKX-NEXT:    .cfi_offset %k5, -40
; CHECK32-SKX-NEXT:    .cfi_offset %k6, -32
; CHECK32-SKX-NEXT:    .cfi_offset %k7, -24
; CHECK32-SKX-NEXT:    cld ## encoding: [0xfc]
; CHECK32-SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK32-SKX-NEXT:    calll _bar ## encoding: [0xe8,A,A,A,A]
; CHECK32-SKX-NEXT:    ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4
; CHECK32-SKX-NEXT:    vmovups (%esp), %zmm0 ## 64-byte Reload
; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24]
; CHECK32-SKX-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm1 ## 64-byte Reload
; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01]
; CHECK32-SKX-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm2 ## 64-byte Reload
; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02]
; CHECK32-SKX-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm3 ## 64-byte Reload
; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03]
; CHECK32-SKX-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm4 ## 64-byte Reload
; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04]
; CHECK32-SKX-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm5 ## 64-byte Reload
; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05]
; CHECK32-SKX-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm6 ## 64-byte Reload
; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06]
; CHECK32-SKX-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm7 ## 64-byte Reload
; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07]
; CHECK32-SKX-NEXT:    kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k0 ## 8-byte Reload
; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x02,0x00,0x00]
; CHECK32-SKX-NEXT:    kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 8-byte Reload
; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x8c,0x24,0x38,0x02,0x00,0x00]
; CHECK32-SKX-NEXT:    kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 8-byte Reload
; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x94,0x24,0x40,0x02,0x00,0x00]
; CHECK32-SKX-NEXT:    kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k3 ## 8-byte Reload
; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x9c,0x24,0x48,0x02,0x00,0x00]
; CHECK32-SKX-NEXT:    kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k4 ## 8-byte Reload
; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xa4,0x24,0x50,0x02,0x00,0x00]
; CHECK32-SKX-NEXT:    kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 8-byte Reload
; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xac,0x24,0x58,0x02,0x00,0x00]
; CHECK32-SKX-NEXT:    kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 8-byte Reload
; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xb4,0x24,0x60,0x02,0x00,0x00]
; CHECK32-SKX-NEXT:    kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 8-byte Reload
; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xbc,0x24,0x68,0x02,0x00,0x00]
; CHECK32-SKX-NEXT:    addl $624, %esp ## encoding: [0x81,0xc4,0x70,0x02,0x00,0x00]
; CHECK32-SKX-NEXT:    ## imm = 0x270
; CHECK32-SKX-NEXT:    popl %eax ## encoding: [0x58]
; CHECK32-SKX-NEXT:    popl %ecx ## encoding: [0x59]
; CHECK32-SKX-NEXT:    popl %edx ## encoding: [0x5a]
; CHECK32-SKX-NEXT:    iretl ## encoding: [0xcf]
  call void @bar()
  ret void
}

declare void @bar()

